import gensim

def fun1():
    with open(r'C:\Users\Lenovo\Desktop\搜狐新闻文本\sohu_train.txt','r',encoding='gbk') as f:
        lines=f.readlines()
        for i in range(3):
            print(lines[i])
            print('\n\n')

def fun2():
    with open('train3.txt','r',encoding='utf-8') as f:
        data=[]
        lable=[]
        line=''
        while True:
            data.clear()
            lable.clear()
            for i in range(100):
                line=f.readline()
                if not line:
                    break
                line=line.strip().split(' ')
                data.append(line[:-1])
                lable.append(line[-1])
            if not line:
                break
            yield data,lable


if __name__ == '__main__':
    g=fun2()

    while True:
        (a,b)=g.__next__()
        print(len(a))
        print(len(b))


        break
